/*LIS Cross-section Data center in Luxembourg*/

/*email: usersupport@lisdatacenter.org*/

/*LIS Self Teaching Package 2022*/
/*Part I: Inequality, poverty, and social policy*/
/*SAS version*/

/*last change of this version of the syntax: 15-01-2022*/

/*Exercise 4: Inequality: The Gini Index*/

OPTIONS NOFMTERR NONOTES NOSOURCE NODATE NONUMBER NOCENTER LABEL LS=MAX PS=MAX;

%MACRO Gini ;
	PROC SORT DATA=&dataset ;
	  BY &var ;
	RUN ;
	DATA Gini (KEEP=gini) ;
	    IF _N_ = 1 THEN
	        DO UNTIL (last) ;
	            SET &dataset END=last;
				swt + &wgt ;
				swtey + (&wgt*&var) ;
	        END ;
	    SET current END=eof;
	        IF _N_ = 1 THEN
	            DO ;
	                prewt = 0 ;
					preey = 0 ;
					up    = 0 ;
					sum   = 0 ;
	            END ;
	           cwt + &wgt ;
		   	cwtey + (&var*&wgt);
		   	pcwt   = cwt / swt * 100;
		   	pcwtey = cwtey / swtey * 100;
		   	up     = (pcwt-prewt) * (pcwtey+preey) ;
		   	sum + up ;
		   	prewt = pcwt ;
	       	preey = pcwtey ;

		   	RETAIN prewt preey ;
			IF eof THEN
	            DO ;
	               gini=1-(sum / 10000) ;
	               OUTPUT ;
	            END ;
	RUN;
	PROC MEANS DATA=Gini MEAN ;
	RUN;
%MEND Gini ;
TITLE "";
DATA current ;
 SET &gt06h (KEEP=dhi hifactor hpub_i hpub_u hpub_a hiprivate hxitsc hpopwgt nhhmem grossnet did) ;
	miss_comp = 0 ;
	IF 	(dhi=. | hpub_i=. | hpub_a=. | hpub_u=. | hiprivate=. | hxitsc=.) THEN miss_comp = 1 ;
	IF miss_comp = 1 THEN DELETE ;
	ipwgt =  hpopwgt*nhhmem ; 
RUN ;

DATA current ;
 SET current ;
	dhitb  = dhi ;
	IF dhitb=. THEN DELETE;
	IF (dhi<0)  THEN dhitb=0;
	dhilog=log(dhitb); 
	IF( (dhilog=.)  AND (dhitb^=.) ) THEN dhilog=0;
	RUN;
	PROC SORT DATA=current ;
	  BY did dhilog;
RUN ;
PROC UNIVARIATE DATA=current NOPRINT;
 VAR dhilog   ; 
   WEIGHT hpopwgt; 
    OUTPUT OUT= temp P25=q25   P75=q75; 
RUN ; 
DATA _NULL_; 
  SET temp; 
    CALL SYMPUT("b",q25); 
    CALL SYMPUT("t",q75); 
 RUN; 
DATA current ; 
SET current ;  
iqr=&t-&b; 
upper_bound=&t + (iqr * 3) ; 
lower_bound=&b - (iqr * 3); 
IF dhitb>exp(upper_bound) THEN dhitb=exp(upper_bound) ; 
IF dhitb<exp(lower_bound) THEN dhitb=exp(lower_bound); 
RUN ; 
RUN ;
DATA current ;
 SET current ;
	dhipc = dhitb / nhhmem       ;
	edhi  = dhitb / SQRT(nhhmem) ;
RUN ;

TITLE "Household income" ;
%LET dataset = current;
%LET var     = dhitb  ;
%LET wgt     = hpopwgt;
%gini

TITLE "Income per Capita";
%LET dataset = current;
%LET var     = dhipc  ;
%LET wgt     = ipwgt  ;
%gini

TITLE "Equivalised income";
%LET dataset = current;
%LET var     = edhi   ;
%LET wgt     = ipwgt  ;
%gini
